This vignette provides an example for creating pvalues objects for the volcano3D pipeline using DESeq2 and limma.
This example consists of a case study from the PEAC rheumatoid arthritis project (Pathobiology of Early Arthritis Cohort). The methodology has been published in ‘Lewis, Myles J., et al. “Molecular portraits of early rheumatoid arthritis identify clinical and treatment response phenotypes.” Cell reports 28.9 (2019): 2455-2470.’ (DOI: 10.1016/j.celrep.2019.07.091) with an interactive web tool available at https://peac.hpc.qmul.ac.uk.
Not yet publicly available:
install.packages("volcano3D")
library(devtools)
install_github("KatrionaGoldmann/volcano3D")
library(volcano3D)
The sample data used in this vignette can be loaded from the volcano3Ddata package. This is only possible after volcano3D is imported.
install.packages("volcano3Ddata")
To create a pvalues data frame we require:
These can both be loaded from the syn_txi dataset in the volcano3Ddata package.
library(volcano3Ddata)
data("syn_txi")
Check the alignment and make sure there are only three possible contrasts (in this case three unique Pathotypes)
if(! identical(syn_metadata$ID, colnames(syn_txi$abundance))){
stop("mis-aligned")
}
if(length(levels(syn_metadata$Pathotype)) != 3){
stop("The number of unique pathotypes must qual 3")
}
groups <- levels(syn_metadata$Pathotype)
contrasts <- c(paste(groups[1], groups[2], sep="-"),
paste(groups[2], groups[3], sep="-"),
paste(groups[3], groups[1], sep="-"))
Our comparisons of interest are set up as:
Fibroid-LymphoidLymphoid-MyeloidMyeloid-FibroidIn this vignette we will outline two methods to determine the differential gene expression:
Use the DESeq package to calculate the differential expression between groups. The vignette for this package can be found here
library(DESeq2)
dds = DESeqDataSetFromTximport(txi = syn_txi,
colData = syn_metadata,
design = ~Pathotype+Batch+Gender)
dds_DE <- DESeq(dds)
dds_LRT <- DESeq(dds, test = "LRT", reduced = ~Batch+Gender, parallel = TRUE)
Output the results into one dataframe. First get the results for each contrast:
Pvals_DESeq_DE <- lapply(contrasts, function(x){
vars <- unlist(strsplit(x, split="-"))
out <- results(dds_DE, contrast=c("Pathotype", vars))
out <- out[match(rownames(syn_txi$counts), rownames(out)), ]
out <- out[,c("pvalue", "padj", "log2FoldChange")]
})
Then get the multi-group test results. In this case we use the Likelihood ratio test - this compares the model using ~Pathotype+Batch+Gender to the reduced model using ~Batch+Gender.
LRT <- results(dds_LRT, parallel=TRUE)[, c("pvalue", "padj", "log2FoldChange")]
LRT <- LRT[match(rownames(syn_txi$counts), rownames(LRT)), ]
The results can then be combined into one pvalues data frame:
Pvals_DESeq <- cbind(Pvals_DESeq_DE[[1]],
Pvals_DESeq_DE[[2]],
Pvals_DESeq_DE[[3]],
LRT)
colnames(Pvals_DESeq) <- c(paste(rep(contrasts, each=3),
rep(colnames(Pvals_DESeq_DE[[1]]), 3)),
paste("LRT", colnames(LRT)))
Pvals_DESeq <- data.frame(Pvals_DESeq[complete.cases(Pvals_DESeq), ],
check.names = FALSE)
Now we can inspect:
head(Pvals_DESeq) %>%
kable() %>%
kable_styling(font_size=8.7)
| Fibroid-Lymphoid pvalue | Fibroid-Lymphoid padj | Fibroid-Lymphoid log2FoldChange | Lymphoid-Myeloid pvalue | Lymphoid-Myeloid padj | Lymphoid-Myeloid log2FoldChange | Myeloid-Fibroid pvalue | Myeloid-Fibroid padj | Myeloid-Fibroid log2FoldChange | LRT pvalue | LRT padj | LRT log2FoldChange | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| A2M | 0.7775361 | 0.8851935 | -0.0478326 | 0.1563045 | 0.3384292 | -0.2202768 | 0.1694654 | 0.4916679 | 0.2681094 | 0.2739571 | 0.4353436 | 0.0527816 |
| A2ML1 | 0.0002106 | 0.0011118 | 1.6854024 | 0.0000030 | 0.0000793 | -1.9299687 | 0.6348844 | 0.8441036 | 0.2445664 | 0.0000022 | 0.0000202 | 0.3983423 |
| A4GALT | 0.0000000 | 0.0000000 | 1.0248506 | 0.0000192 | 0.0003539 | -0.5646665 | 0.0055222 | 0.1009600 | -0.4601841 | 0.0000000 | 0.0000000 | 0.2121462 |
| A4GNT | 0.0238302 | 0.0610621 | -1.1527859 | 0.6691773 | 0.8180681 | -0.1857435 | 0.0197330 | 0.1893057 | 1.3385294 | 0.0732984 | 0.1583021 | -0.1939520 |
| AAAS | 0.8574281 | 0.9383721 | -0.0245296 | 0.6608837 | 0.8124146 | 0.0548697 | 0.8469982 | 0.9435306 | -0.0303402 | 0.9075226 | 0.9711813 | 0.3523114 |
| AACS | 0.2815302 | 0.4322015 | 0.1729592 | 0.0630182 | 0.1821943 | -0.2732040 | 0.5874802 | 0.8169632 | 0.1002448 | 0.1472059 | 0.2732938 | -0.2881377 |
Use the Limma package to calculate the differential expression between groups. The limma vignette can be found here and limma-voom vignette here.
library(limma)
library(edgeR)
syn_tpm = syn_txi$counts
# build the design matrix and contrast matrix
design <- model.matrix(~ 0 + syn_metadata$Pathotype + syn_metadata$Batch)
colnames(design) <- gsub("syn_metadata\\$Pathotype", "", colnames(design))
colnames(design) <- gsub("syn_metadata\\$Batch", "", colnames(design))
contrast.matrix <- makeContrasts(
paste0(colnames(design)[1] , "-", colnames(design)[2]),
paste0(colnames(design)[2] , "-", colnames(design)[3]),
paste0(colnames(design)[3] , "-", colnames(design)[1]),
levels = design)
# filter data and normalise
dge <- DGEList(counts = syn_tpm)
keep <- filterByExpr(dge, design)
dge <- dge[keep, , keep.lib.sizes=FALSE]
dge <- calcNormFactors(dge)
# voom
v <- voom(dge, design, plot=FALSE)
fit1 <- lmFit(v, design)
fit <- contrasts.fit(fit1, contrast.matrix)
fit <- eBayes(fit)
Now we can get the model fit results.
contrasts <- colnames(coefficients(fit))
Pvals_limma_DE <- lapply(contrasts, function(x){
id = which(colnames(coefficients(fit)) == x)
out <- topTable(fit, adjust.method = "fdr", coef= id, number=Inf,
sort.by="none")
rownames(out) <- make.names(out$ID, unique=T)
out <- out[,c("P.Value", "adj.P.Val", "logFC")]
colnames(out) <- c("pvalue", "padj", "log2FoldChange")
out
})
Pvals_overall <- topTable(fit, coef=1:3, adjust.method="fdr", number=Inf,
sort.by="none")[,c("P.Value", "adj.P.Val")]
colnames(Pvals_overall) <- c("pvalue", "padj")
The results can then be combined into one pvalues data frame:
Pvals_limma <- cbind(Pvals_limma_DE[[1]],
Pvals_limma_DE[[2]],
Pvals_limma_DE[[3]],
Pvals_overall)
colnames(Pvals_limma) <- c(paste(rep(contrasts, each=3),
rep(colnames(Pvals_limma_DE[[1]]), 3)),
paste("Overall", colnames(Pvals_overall)))
Now we can inspect:
head(Pvals_limma) %>%
kable() %>%
kable_styling(font_size=8.7)
| Fibroid-Lymphoid pvalue | Fibroid-Lymphoid padj | Fibroid-Lymphoid log2FoldChange | Lymphoid-Myeloid pvalue | Lymphoid-Myeloid padj | Lymphoid-Myeloid log2FoldChange | Myeloid-Fibroid pvalue | Myeloid-Fibroid padj | Myeloid-Fibroid log2FoldChange | Overall pvalue | Overall padj | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| A2M | 0.5170499 | 0.6366674 | -0.1121413 | 0.2279382 | 0.4481883 | -0.1979652 | 0.1303462 | 0.4559284 | 0.3101065 | 0.2869610 | 0.4293019 |
| A2ML1 | 0.0144789 | 0.0390096 | 1.2145059 | 0.0602010 | 0.1877927 | -0.8634615 | 0.5167596 | 0.7917087 | -0.3510444 | 0.0239250 | 0.0629112 |
| A4GALT | 0.0000000 | 0.0000000 | 1.0208963 | 0.0000197 | 0.0006240 | -0.5925725 | 0.0114855 | 0.1786983 | -0.4283238 | 0.0000000 | 0.0000000 |
| A4GNT | 0.0967374 | 0.1782220 | -0.9478684 | 0.8495848 | 0.9298853 | 0.0924637 | 0.1800195 | 0.5235976 | 0.8554047 | 0.2318880 | 0.3676664 |
| AAAS | 0.4801279 | 0.6027211 | 0.0810740 | 0.9907265 | 0.9954684 | -0.0012020 | 0.5395954 | 0.8042311 | -0.0798720 | 0.7611265 | 0.8432504 |
| AACS | 0.2047727 | 0.3179074 | 0.2305878 | 0.1038665 | 0.2708610 | -0.2669746 | 0.8557094 | 0.9511441 | 0.0363868 | 0.1867949 | 0.3139590 |
Both the deseq and limma-voom pvalues objects can then be used to create various plots with the volcano3D package as outlined in the vignette.
First lets load in the expression data.
library(volcano3D)
library(volcano3Ddata)
data("syn_data")
# Curate the expression data
syn_exp = syn_rld
rownames(syn_exp) = make.names(rownames(syn_exp), unique = T)
# Align the expression and pvalue data
syn_exp = syn_exp[intersect(rownames(Pvals_DESeq), rownames(syn_exp)), ]
Pvals_DESeq = Pvals_DESeq[intersect(rownames(Pvals_DESeq), rownames(syn_exp)), ]
syn_polar_deseq <- polar_coords(sampledata = syn_metadata,
contrast = "Pathotype",
pvalues = Pvals_DESeq,
expression = syn_exp,
p_col_suffix = "pvalue",
padj_col_suffix = "padj",
fc_col_suffix = "log2FoldChange",
multi_group_prefix = "LRT",
non_sig_name = "Not Significant",
significance_cutoff = 0.01,
label_column = NULL,
fc_cutoff = 0.1)
radial_plotly(polar = syn_polar_deseq,
colours=c("green3", "cyan", "gold2", "blue", "purple", "red"),
label_rows = c("SLAMF6", "PARP16", "ITM2C"))
p <- volcano3D(syn_polar_deseq,
label_rows = c("SLAMF6", "PARP16", "ITM2C"),
label_size = 10,
xy_aspectratio = 1,
z_aspectratio = 0.9)
p %>% plotly::layout(legend = list(x = 100, y = 0.5))
The only difference here is we change the multi_group_prefix to “Overall” and remove the fold change paramtere.
# Curate the expression data
syn_exp = syn_rld
rownames(syn_exp) = make.names(rownames(syn_exp), unique = T)
# Align the expression and pvalue data
syn_exp = syn_exp[intersect(rownames(Pvals_limma), rownames(syn_exp)), ]
Pvals_limma = Pvals_limma[intersect(rownames(Pvals_limma), rownames(syn_exp)), ]
syn_polar_limma <- polar_coords(sampledata = syn_metadata,
contrast = "Pathotype",
pvalues = Pvals_limma,
expression = syn_exp,
p_col_suffix = "pvalue",
padj_col_suffix = "padj",
fc_col_suffix = NULL,
multi_group_prefix = "Overall",
non_sig_name = "Not Significant",
significance_cutoff = 0.01,
label_column = NULL,
fc_cutoff = 0.1)
radial_plotly(polar = syn_polar_limma,
olours=c("green3", "cyan", "gold2", "blue", "purple", "red"),
label_rows = c("SLAMF6", "PARP16", "ITM2C"))
p <- volcano3D(syn_polar_limma,
label_rows = c("SLAMF6", "PARP16", "ITM2C"),
label_size = 10,
xy_aspectratio = 1,
z_aspectratio = 0.9)
p %>% plotly::layout(legend = list(x = 100, y = 0.5))